********************************************************************************
** Cederman, Galano, Girardin and Schvitz. War Did Make States.
** Article prepared for International Organization
** June 20, 2022
**
** Stata do-file: data_prep_dyadic.do
** Data preparation file for dyad-level data
** Required file paths set in runall.do 
** (to be run after data_prep1 but before data_prep2)
********************************************************************************


// Should be run from runall.do which contains all relevant label definitions for files and directories
cd $ROOT
cd $INPUTDIR  

insheet using $INPUTFILED, clear

// Define dyad identity codes
gen idd = ida*1000 + idb
gen idd2 = idd
replace idd2 = idb * 1000 + ida if idb!=. & idb < ida
 
sort idd year

// Main dyadic geography variables
gen ldist = log(mindistance+1)
gen neigh = 0
replace neigh = 1 if mindistance<10


// Merge in state level data for State A
gen id = ida
sort id year
cd $ROOT
cd $INTERMEDIATEDIR
merge m:1 id year using statedata_intermediate
 drop _merge
gen areaa = area
gen popa = population
gen lareaa = log(areaa)
gen lpopa = log(popa)
gen growthwara = growthwarbreckearea
gen growthpeacea = growthpeacebreckearea
gen shrinkwara = shrinkwarbreckearea
gen shrinkpeacea = shrinkpeacebreckearea

replace elevationsd = 0 if elevationsd == .
gen elevsda = elevationsd
gen deatha = death
gen agea = age
gen coastmina = coastmin
gen lurbana = lurban
gen llcentraldista = llcentraldist
gen lcentrala = lcentral

drop id area population growthwarbreckearea growthpeacebreckearea shrinkwarbreckearea shrinkpeacebreckearea elevationsd /// 
death age coastmin lurban llcentraldist lcentral

// Merge in state level data for State B
gen id = idb
sort id year
cd $ROOT
cd $INTERMEDIATEDIR
merge m:1 id year using statedata_intermediate
 drop _merge 
gen areab = area
gen popb = population
gen lareab = log(areab)
gen lpopb = log(popb)
gen growthwarb = growthwarbreckearea
gen growthpeaceb = growthpeacebreckearea
gen shrinkwarb = shrinkwarbreckearea
gen shrinkpeaceb = shrinkpeacebreckearea
gen elevsdb = elevationsd
gen deathb = death
gen ageb = age
gen coastminb = coastmin
gen lurbanb = lurban
gen llcentraldistb = llcentraldist
gen lcentralb = lcentral

drop id area population growthwarbreckearea growthpeacebreckearea shrinkwarbreckearea shrinkpeacebreckearea elevationsd ///
death age coastmin lurban llcentraldist lcentral

// Compute cumulative dyadic vars
// Growth vars
xtset idd year
replace growthwara = 0 if growthwara == .
replace growthpeacea = 0 if growthpeacea == .
replace growthwarb = 0 if growthwarb == .
replace growthpeaceb = 0 if growthpeaceb == .

// Shrink vars
replace shrinkwara = 0 if shrinkwara == .
replace shrinkpeacea = 0 if shrinkpeacea == .
replace shrinkwarb = 0 if shrinkwarb == .
replace shrinkpeaceb = 0 if shrinkpeaceb == .

gen llgrowthwara = log(l5.growthwara+1)
gen llgrowthpeacea = log(l5.growthpeacea+1)
gen llgrowthwarb = log(l5.growthwarb+1)
gen llgrowthpeaceb = log(l5.growthpeaceb+1)
gen llshrinkwara = log(l5.shrinkwara+1)
gen llshrinkpeacea = log(l5.shrinkpeacea+1)
gen llshrinkwarb = log(l5.shrinkwarb+1)
gen llshrinkpeaceb = log(l5.shrinkpeaceb+1)

gen llgrowtha = log(l5.growthwara+l5.growthpeacea+1) 
gen llshrinkb = log(l5.shrinkwarb+l5.shrinkpeaceb+1)

// Compute dependent variables
// Netgain
replace gainaarea = 0 if gainaarea == .
replace lossaarea = 0 if lossaarea == .
drop netgain
gen netgain = .
replace netgain = gainaarea - lossaarea
gen netgain0 = netgain
replace netgain0 = 0 if netgain<0 & netgain!=. 

gen netloss0 = abs(netgain)
replace netloss0 = 0 if netgain>0 & netgain!=. 
 
// DVs...
gen lnga = log(netgain0+1)
gen lnla = log(netloss0+1)
//gen nga1 = 0
//replace nga1 = 1 if netgain0 > 100

 // Revised DV with losses dyad coded as missing
 
 gen lnga0 = lnga
 replace lnga0 = . if netgain < 0 

 
 gen lnla0 = lnla
 replace lnla0 = . if netgain > 0 
 

/////////////////////////////////////////////////
 
gen lnga2 = log(abs(netgain)+1)
 
gen nga21 = 0
replace nga21 = 1 if lnga2>0 & lnga2!=.
 
gen nga2 = abs(netgain)

// Compute dummy DVs

// drop nga1

gen nga1 = 0
replace nga1 = 1 if netgain > 0 & netgain != .

gen nla1 = 0
replace nla1 = 1 if netgain < 0 & netgain != .

 drop event
gen event = 0
replace event = 1 if nga1==1 | nla1==1 
 drop noeventyear
btscs  event year idd, gen(noeventyear) nspline(3)
 drop evspline*
rename _spline1  evspline1
rename _spline2  evspline2
rename _spline3  evspline3


gen nga10 = nga1
replace nga10 = . if netgain < 0 & netgain != .

gen nla10 = nla1
replace nla10 = . if netgain > 0 & netgain != .



////////////////////////////////////////////

// Conflict variables
drop inc1
gen inc1 = .
replace inc1 = 0 if incidencebrecke == 0 
replace inc1 = 1 if incidencebrecke > 0  & incidencebrecke!=.



  
xtset idd year
 
// Prepare vars for dyadic analysis
 gen lneigh = l5.neigh
 gen lldist = l5.ldist
 gen llareaa = l5.lareaa
 gen llareab = l5.lareab
 
// Set labels for output
label variable lnga "log net terr. gain"
label variable lneigh "neighbors"
label variable lldist "log distance"
label variable inc1 "conflict"
label variable llareaa "log size A"
label variable llareab "log size B"
label variable llgrowthwara "log cumul. war gains A"
label variable llgrowthpeacea "log cumul. peace gains A"
 
 

// Compute various relational measures aggregated to the country level
// Used as control variables... 
 
 bys ida year: egen lnumneighsa = sum(lneigh)
 gen llnumneighsa = log(lnumneighsa+1)
 
 bys idb year: egen lnumneighsb = sum(lneigh)
 gen llnumneighsb = log(lnumneighsb+1)


 xtset idd year
 
 gen llelevsda = log(l5.elevsda+1)
 gen llelevsdb = log(l5.elevsdb+1)
 


 


gen llr = llareaa/log(l5.areaa + l5.areab)
gen llrpop = l5.lpopa/log(l5.popa+l5.popb)
gen inc1Xllr = inc1 * llr

// Save intermediate analysis data
cd $ROOT
cd $INTERMEDIATEDIR
save dyad_analysis1, replace
// save dyad_analysis1warlag, replace


///////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Aggregation to country level

gen dyad_wargains = inc1*netgain0
gen dyad_peacegains = (1-inc1)*netgain0
gen dyad_warlosses = inc1*netloss0
gen dyad_peacelosses = (1-inc1)*netloss0


collapse (sum)dyad_wargains dyad_peacegains dyad_warlosses dyad_peacelosses  (max) llnumneighsa lnumneighsa, by (ida year) 

xtset ida year

rename ida id 

// Save country-level variables that are used in country-level analysis
cd $ROOT
cd $INTERMEDIATEDIR
save dyad_cumul.dta, replace


///////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Reload main analysis data again
cd $ROOT
cd $INTERMEDIATEDIR
use dyad_analysis1, clear
// use dyad_analysis1warlag, clear


// Merge in country level data for States A and B
// drop id
gen id = ida
merge m:1 id year using dyad_cumul
gen dyad_wargainsa = dyad_wargains
gen dyad_peacegainsa = dyad_peacegains
gen dyad_warlossesa = dyad_warlosses
gen dyad_peacelossesa = dyad_peacelosses
 drop id _merge dyad_peacegains dyad_warlosses dyad_peacelosses


replace dyad_wargainsa = 0 if dyad_wargainsa == .
replace dyad_peacegainsa = 0 if dyad_peacegainsa == .
replace dyad_warlossesa = 0 if dyad_warlossesa == .
replace dyad_peacelossesa = 0 if dyad_peacelossesa == .

 // drop id
gen id = idb
merge m:1 id year using dyad_cumul
gen dyad_wargainsb = dyad_wargains
gen dyad_peacegainsb = dyad_peacegains
gen dyad_warlossesb = dyad_warlosses
gen dyad_peacelossesb = dyad_peacelosses
 drop id _merge dyad_wargains dyad_peacegains dyad_warlosses dyad_peacelosses


replace dyad_wargainsb = 0 if dyad_wargainsb == .
replace dyad_peacegainsb = 0 if dyad_peacegainsb == .
replace dyad_warlossesb = 0 if dyad_warlossesb == .
replace dyad_peacelossesb = 0 if dyad_peacelossesb == .



// Analysis data including onset etc.

gen onsetinita1 = 0
replace onsetinita1 = 1 if  onsetinitiatorabrecke > 0 & onsetinitiatorabrecke !=.

btscs onsetinita1 year idd, gen(pyis) nspline(3)
rename _spline1  pyispline1
rename _spline2  pyispline2
rename _spline3  pyispline3



gen incinita1 = 0
replace incinita1 = 1 if inc1==1 & incidenceinitiatorabrecke > 0 & incidenceinitiatorabrecke !=.

gen incinitsidea1 = incinita1
replace incinitsidea1 = 1 if incidencefollowerabrecke > 0 & incidencefollowerabrecke !=.

gen incattackedb1 = 0
replace incattackedb1 = 1 if incidencebrecke>0 & incidenceinitiatorbbrecke==0 & incidencefollowerbbrecke==0

gen inc1Xgrowthwara = inc1 * llgrowthwara
gen inc1Xgrowthpeacea = inc1 * llgrowthpeacea
gen inc1Xshrinkwarb = inc1 * llshrinkwarb
gen inc1Xshrinkpeaceb = inc1 * llshrinkpeaceb


gen llagea = log(l5.agea*5+5)
gen llageb = log(l5.ageb*5+5)
gen llcoastdista = log(l5.coastmina+1)
gen llcoastdistb = log(l5.coastminb+1)
gen lcoastala = 0
replace lcoastala = 1 if coastmina < 10
gen lcoastalb = 0
replace lcoastalb = 1 if coastminb < 10

gen lr = l5.areaa/(l5.areaa+l5.areab)
gen lrwar = l5.growthwara / (l5.growthwara + l5.growthwarb)
gen lrpeace = l5.growthpeacea / (l5.growthpeacea + l5.growthpeaceb)

// Save main analysis data
cd $ROOT
cd $INTERMEDIATEDIR
save dyad_analysis2, replace 



///////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Again aggregate analysis data to country level

collapse (max)dyad_* /*(max)lr*nb*/, by (ida year)
rename ida id
save cumulneigh, replace

cd $ROOT
cd $INTERMEDIATEDIR
use dyad_analysis2, clear

 
 erase dyad_analysis1.dta
